Coping Flexibility and Strategy Use Predict Burnout: A Longitudinal Study
Authors
Affiliation
Rebecca Kirkham
Monash University
Joshua F. Wiley
Monash University
Published
January 1, 2026
1 Loading Packages
Code
library(readxl)library(tidyr)library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Code
library(psych)
Warning: package 'psych' was built under R version 4.4.2
Code
library(writexl)library(mice)
Warning: package 'mice' was built under R version 4.4.2
Attaching package: 'mice'
The following object is masked from 'package:stats':
filter
The following objects are masked from 'package:base':
cbind, rbind
Code
library(scales)
Attaching package: 'scales'
The following objects are masked from 'package:psych':
alpha, rescale
Code
library(ggplot2)
Attaching package: 'ggplot2'
The following objects are masked from 'package:psych':
%+%, alpha
Code
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
Code
library(car)
Warning: package 'car' was built under R version 4.4.2
Loading required package: carData
Warning: package 'carData' was built under R version 4.4.2
Attaching package: 'car'
The following object is masked from 'package:psych':
logit
The following object is masked from 'package:dplyr':
recode
Code
library(lmtest)
Warning: package 'lmtest' was built under R version 4.4.2
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Code
library(lm.beta)
Warning: package 'lm.beta' was built under R version 4.4.3
2 Read in the Data
Code
setwd("~/Clin PhD/Research Project/Study. 3 Quantitative Study ER flex/Data/Burnout Paper/")df_CC <-read_excel("~/Clin PhD/Research Project/Study. 3 Quantitative Study ER flex/Data/Burnout Paper/Dataset_clean+scored_CCExcl_100725.xlsx")#from df, AFTER subsetting to complete cases and removing 3x influential PIDs were excluded based on assumption checks from complete case analysesdf <-read_excel("~/Clin PhD/Research Project/Study. 3 Quantitative Study ER flex/Data/Burnout Paper/Dataset_clean+scored_MIExcl_100725.xlsx")#from df, AFTER 3x influential PIDs were excluded based on assumption checks from MI models
Interim steps included data preparation, scoring, multiple imputation preparation, and assumption checking for the complete case dataset. These sections are presented below the main analysis code. They were conducted on earlier versions of the dataset, with outputs saved and used to generate the final dataset for analysis. The workflow is documented in full to provide transparency and reproducibility.
Note that throughout the data file the term ER (emotion regulation) is used interchangeably with Coping.
3 Checking assumptions to run complete case dataset
#CFS# Define all relevant column names explicitly for EvaluationEvaluation_alpha <- df[, c("T1_CFS_2_R", "T1_CFS_6", "T1_CFS_7_R", "T1_CFS_8", "T1_CFS_9")]psych::alpha(Evaluation_alpha)
Warning in psych::alpha(Evaluation_alpha): Some items were negatively correlated with the first principal component and probably
should be reversed.
To do this, run the function again with the 'check.keys=TRUE' option
Some items ( T1_CFS_2_R ) were negatively correlated with the first principal component and
probably should be reversed.
To do this, run the function again with the 'check.keys=TRUE' option
# Full modelfit_full <- BAT12Emo_MImodel_B_ERall # model with more predictors# Reduced modelfit_reduced <- BAT12Emo_MImodel_A_Cov # nested model# Run D1 testBAT12Emo_MImodelAB_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Emo_MImodelAB_comparison)
# Full modelfit_full <- BAT12Ment_MImodel_B_ERall # model with more predictors# Reduced modelfit_reduced <- BAT12Ment_MImodel_A_Cov # nested model# Run D1 testBAT12Ment_MImodelAB_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Ment_MImodelAB_comparison)
# Full modelfit_full <- BAT12Exh_MImodel_B_ERall # model with more predictors# Reduced modelfit_reduced <- BAT12Exh_MImodel_A_Cov # nested model# Run D1 testBAT12Exh_MImodelAB_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Exh_MImodelAB_comparison)
#A to C# Full modelfit_full <- BAT12Cog_MImodel_C_CST # model with more predictors# Reduced modelfit_reduced <- BAT12Cog_MImodel_A_Cov # nested model# Run D1 testBAT12Cog_MImodelAC_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Cog_MImodelAC_comparison)
# Full modelfit_full <- BAT12Emo_MImodel_C_CST # model with more predictors# Reduced modelfit_reduced <- BAT12Emo_MImodel_A_Cov # nested model# Run D1 testBAT12Emo_MImodelAC_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Emo_MImodelAC_comparison)
# Full modelfit_full <- BAT12Ment_MImodel_C_CST # model with more predictors# Reduced modelfit_reduced <- BAT12Ment_MImodel_A_Cov # nested model# Run D1 testBAT12Ment_MImodelAC_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Ment_MImodelAC_comparison)
# Full modelfit_full <- BAT12Exh_MImodel_C_CST # model with more predictors# Reduced modelfit_reduced <- BAT12Exh_MImodel_A_Cov # nested model# Run D1 testBAT12Exh_MImodelAC_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Exh_MImodelAC_comparison)
#A to D# Full modelfit_full <- BAT12Cog_MImodel_D_CFS # model with more predictors# Reduced modelfit_reduced <- BAT12Cog_MImodel_A_Cov # nested model# Run D1 testBAT12Cog_MImodelAD_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Cog_MImodelAD_comparison)
# Full modelfit_full <- BAT12Emo_MImodel_D_CFS # model with more predictors# Reduced modelfit_reduced <- BAT12Emo_MImodel_A_Cov # nested model# Run D1 testBAT12Emo_MImodelAD_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Emo_MImodelAD_comparison)
# Full modelfit_full <- BAT12Ment_MImodel_D_CFS # model with more predictors# Reduced modelfit_reduced <- BAT12Ment_MImodel_A_Cov # nested model# Run D1 testBAT12Ment_MImodelAD_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Ment_MImodelAD_comparison)
# Full modelfit_full <- BAT12Exh_MImodel_D_CFS # model with more predictors# Reduced modelfit_reduced <- BAT12Exh_MImodel_A_Cov # nested model# Run D1 testBAT12Exh_MImodelAD_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Exh_MImodelAD_comparison)
#A to E# Full modelfit_full <- BAT12Cog_MImodel_E_all # model with more predictors# Reduced modelfit_reduced <- BAT12Cog_MImodel_A_Cov # nested model# Run D1 testBAT12Cog_MImodelAE_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Cog_MImodelAE_comparison)
# Full modelfit_full <- BAT12Emo_MImodel_E_all # model with more predictors# Reduced modelfit_reduced <- BAT12Emo_MImodel_A_Cov # nested model# Run D1 testBAT12Emo_MImodelAE_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Emo_MImodelAE_comparison)
# Full modelfit_full <- BAT12Ment_MImodel_E_all # model with more predictors# Reduced modelfit_reduced <- BAT12Ment_MImodel_A_Cov # nested model# Run D1 testBAT12Ment_MImodelAE_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Emo_MImodelAE_comparison)
# Full modelfit_full <- BAT12Exh_MImodel_E_all # model with more predictors# Reduced modelfit_reduced <- BAT12Exh_MImodel_A_Cov # nested model# Run D1 testBAT12Exh_MImodelAE_comparison <- mice::D1(fit_full, fit_reduced)# View resultsprint(BAT12Exh_MImodelAE_comparison)
# Count the number of unique men (0) and women (1) in the samplegender_counts <- df %>%group_by(PID) %>%summarise(T1_Gender =first(T1_Gender)) %>%# Get the sex for each unique participantsummarise(Man =sum(T1_Gender ==1, na.rm =TRUE),Woman =sum(T1_Gender ==2, na.rm =TRUE),Nonbinary =sum(T1_Gender ==3, na.rm =TRUE),NotDisclosed =sum(T1_Gender ==4, na.rm =TRUE) )# View the resultprint(gender_counts)
# A tibble: 1 × 4
Man Woman Nonbinary NotDisclosed
<int> <int> <int> <int>
1 118 209 8 2
Code
# Count how many unique PIDs endorsed each ethnicity# Aggregate endorsements per PID, ensuring each PID is counted once per ethnicity# df <- df %>%# separate_rows(T1_Ethnicity, sep = ",") %>% # Split values into separate rows# mutate(T1_Ethnicity = as.numeric(T1_Ethnicity)) %>% # Convert to numeric# mutate(Indicator = 1) %>% # Create an indicator for presence# pivot_wider(names_from = T1_Ethnicity, values_from = Indicator, values_fill = 0, names_prefix = "T1_Ethnicity_") # Pivot to wide format# pid_summary <- df %>%# group_by(PID) %>%# summarise(across(starts_with("T1_Ethnicity_"), ~any(. == 1, na.rm = TRUE))) %>%# ungroup()# # ethnicity_counts <- pid_summary %>%# summarise(across(starts_with("T1_Ethnicity_"), ~sum(. == TRUE)))# # ethnicity_counts_named <- ethnicity_counts %>%# rename(# "White" = T1_Ethnicity_1,# "Hispanic or Latino" = T1_Ethnicity_2,# "Black or African American" = T1_Ethnicity_3,# "African" = T1_Ethnicity_4,# "Asian" = T1_Ethnicity_5,# "South Asian" = T1_Ethnicity_6,# "Pacific Islander" = T1_Ethnicity_7,# "Middle Eastern" = T1_Ethnicity_9,# "Other" = T1_Ethnicity_10# )# # print(ethnicity_counts_named)# write.csv(ethnicity_counts_named, "~/Clin PhD//Research Project/Study. 3 Quantitative Study ER flex/Data/FlexPaper/ethnicity_counts4.csv", row.names = FALSE)# Define income bracket labelsincome_labels <-c("Less than $10,000","$10,000 to $19,999","$20,000 to $29,999","$30,000 to $39,999","$40,000 to $49,999","$50,000 to $59,999","$60,000 to $69,999","$70,000 to $79,999","$80,000 to $89,999","$90,000 to $99,999","$100,000 to $149,999","$150,000 or more")# Ensure unique endorsement per PID by taking the first non-NA income levelincome_summary <- df %>%group_by(PID) %>%summarise(T1_Income =min(T1_Income, na.rm =TRUE)) %>%# Get the lowest valid income level per PIDungroup()# Remove cases where all values were NAincome_summary <- income_summary %>%filter(!is.infinite(T1_Income)) # Count occurrences of each income levelincome_counts <- income_summary %>%count(T1_Income, name ="Count") %>%arrange(T1_Income)# Assign labelsincome_counts <- income_counts %>%mutate(Income_Bracket = income_labels[T1_Income])# Print resultprint(income_counts)
# A tibble: 12 × 3
T1_Income Count Income_Bracket
<dbl> <int> <chr>
1 1 21 Less than $10,000
2 2 10 $10,000 to $19,999
3 3 15 $20,000 to $29,999
4 4 21 $30,000 to $39,999
5 5 15 $40,000 to $49,999
6 6 15 $50,000 to $59,999
7 7 19 $60,000 to $69,999
8 8 19 $70,000 to $79,999
9 9 23 $80,000 to $89,999
10 10 27 $90,000 to $99,999
11 11 88 $100,000 to $149,999
12 12 64 $150,000 or more
Code
# write.csv(income_counts, "~/Clin PhD/Research Project/Study. 3 Quantitative Study ER flex/Data/FlexPaper/income_counts3.csv", row.names = FALSE)# Define employment category bracket labelsemployment_labels <-c("Executive, administrator, or senior manager (e.g., CEO, sales VP, plant manager)","Professional (e.g., engineer, accountant, systems analyst)","Technical support (e.g., lab technician, legal assistant, computer programmer)","Sales (e.g., sales representative, stockbroker, retail sales)","Clerical and administrative support (e.g., secretary, billing clerk, office supervisor)","Service occupation (e.g., security officer, food service worker, janitor)","Precision production and crafts worker (e.g., mechanic, carpenter, machinist)","Chemical/Production Operator (e.g., shift supervisors and hourly employees)","Labourer (e.g., truck driver, construction worker)","Healthcare professional (e.g., doctor, nurse, psychologist, support worker)","Student")# Ensure unique endorsement per PID by taking the first non-NA employment categoryemployment_summary <- df %>%group_by(PID) %>%summarise(T1_Employment_Cat =min(T1_Employment_Cat, na.rm =TRUE)) %>%# Get the lowest valid employment category per PIDungroup()# Remove cases where all values were NAemployment_summary <- employment_summary %>%filter(!is.infinite(T1_Employment_Cat)) # Count occurrences of each employment categoryemployment_counts <- employment_summary %>%count(T1_Employment_Cat, name ="Count") %>%arrange(T1_Employment_Cat)# Assign labelsemployment_counts <- employment_counts %>%mutate(Employment_Category = employment_labels[T1_Employment_Cat])# Print resultprint(employment_counts)
# A tibble: 11 × 3
T1_Employment_Cat Count Employment_Category
<dbl> <int> <chr>
1 1 4 Executive, administrator, or senior manager (e.g., C…
2 2 93 Professional (e.g., engineer, accountant, systems an…
3 3 15 Technical support (e.g., lab technician, legal assis…
4 4 20 Sales (e.g., sales representative, stockbroker, reta…
5 5 38 Clerical and administrative support (e.g., secretary…
6 6 19 Service occupation (e.g., security officer, food ser…
7 7 1 Precision production and crafts worker (e.g., mechan…
8 8 2 Chemical/Production Operator (e.g., shift supervisor…
9 9 9 Labourer (e.g., truck driver, construction worker)
10 10 38 Healthcare professional (e.g., doctor, nurse, psycho…
11 11 98 Student
Code
# write.csv(employment_counts, "~/Clin PhD/Research Project/Study. 3 Quantitative Study ER flex/Data/FlexPaper/employment_counts2.csv", row.names = FALSE)#Work and Study Summarydf %>%count(T1_EmploymentMI)
# A tibble: 3 × 2
T1_EmploymentMI n
<chr> <int>
1 Study only 49
2 Work & Study 72
3 Work only 216
Code
#how long did surveys take?mean(df$T1_Durationinseconds, na.rm =TRUE)
# Variables of interestvars <-c("T1_BAT12_Cognitive", "T1_BAT12_Emotion", "T1_BAT12_MentalDistance", "T1_BAT12_Exhaustion","T1_PSS4", "Age_2024", "T1_COPE_SelfDistraction", "T1_COPE_ActiveCoping", "T1_COPE_Denial", "T1_COPE_SubstanceUse","T1_COPE_UseOfEmotionalSupport", "T1_COPE_BehavioralDisengagement", "T1_COPE_Religion", "T1_COPE_Venting", "T1_COPE_UseOfInstrumentalSupport", "T1_COPE_PositiveReframing","T1_COPE_SelfBlame", "T1_COPE_Planning", "T1_COPE_Humor", "T1_COPE_Acceptance","T1_CFS_Adaptive", "T1_CFS_Evaluation_new", "CST_switchcost.1")# Subset datadf_subset <- df_CC[, vars]# Spearman correlationcor_results <- psych::corr.test(df_subset, method ="spearman", use ="pairwise")# Extract matricesr_mat <- cor_results$rp_mat <- cor_results$p# Function to add starsadd_stars <-function(r, p) { stars <-ifelse(p < .001, "***",ifelse(p < .01, "**",ifelse(p < .05, "*", "")))paste0(format(round(r, 2), nsmall =2), stars)}# Apply starsr_with_stars <-matrix(nrow =nrow(r_mat), ncol =ncol(r_mat))for (i in1:nrow(r_mat)) {for (j in1:ncol(r_mat)) { r_with_stars[i, j] <-add_stars(r_mat[i, j], p_mat[i, j]) }}# Make it a nicely labeled data framerownames(r_with_stars) <-colnames(r_with_stars) <-colnames(r_mat)r_with_stars_df <-as.data.frame(r_with_stars)# Optional: remove upper triangle for cleaner tabler_with_stars_df[upper.tri(r_with_stars_df, diag =TRUE)] <-""# Print as publication-style tableknitr::kable(r_with_stars_df, format ="markdown", caption ="Spearman Correlation Matrix with Significance Stars")
Spearman Correlation Matrix with Significance Stars
T1_BAT12_Cognitive
T1_BAT12_Emotion
T1_BAT12_MentalDistance
T1_BAT12_Exhaustion
T1_PSS4
Age_2024
T1_COPE_SelfDistraction
T1_COPE_ActiveCoping
T1_COPE_Denial
T1_COPE_SubstanceUse
T1_COPE_UseOfEmotionalSupport
T1_COPE_BehavioralDisengagement
T1_COPE_Religion
T1_COPE_Venting
T1_COPE_UseOfInstrumentalSupport
T1_COPE_PositiveReframing
T1_COPE_SelfBlame
T1_COPE_Planning
T1_COPE_Humor
T1_COPE_Acceptance
T1_CFS_Adaptive
T1_CFS_Evaluation_new
CST_switchcost.1
T1_BAT12_Cognitive
T1_BAT12_Emotion
0.36***
T1_BAT12_MentalDistance
0.55***
0.43***
T1_BAT12_Exhaustion
0.57***
0.39***
0.69***
T1_PSS4
0.42***
0.40***
0.54***
0.48***
Age_2024
-0.05
0.13*
0.07
-0.05
-0.03
T1_COPE_SelfDistraction
0.26***
0.09
0.15*
0.18**
0.13*
-0.07
T1_COPE_ActiveCoping
0.08
0.08
0.07
0.11
-0.08
-0.02
0.35***
T1_COPE_Denial
0.13
0.28***
0.15*
0.11
0.22***
0.00
0.15*
0.18**
T1_COPE_SubstanceUse
0.16*
0.22***
0.13*
0.10
0.21**
0.12
0.13*
-0.04
0.25***
T1_COPE_UseOfEmotionalSupport
-0.03
0.10
0.04
0.02
-0.15*
-0.04
0.31***
0.39***
0.14*
0.07
T1_COPE_BehavioralDisengagement
0.23***
0.17**
0.23***
0.22***
0.31***
-0.07
0.21***
0.00
0.31***
0.20**
0.01
T1_COPE_Religion
0.00
0.15*
0.02
0.02
0.05
-0.01
0.18**
0.28***
0.27***
0.12
0.19**
0.12
T1_COPE_Venting
0.13*
0.24***
0.17**
0.17*
0.10
-0.08
0.37***
0.41***
0.27***
0.17*
0.46***
0.19**
0.24***
T1_COPE_UseOfInstrumentalSupport
-0.01
0.05
0.02
0.02
-0.08
-0.08
0.34***
0.43***
0.17**
0.03
0.78***
0.05
0.23***
0.50***
T1_COPE_PositiveReframing
-0.07
0.11
-0.08
-0.01
-0.13
0.02
0.25***
0.44***
0.13*
0.02
0.43***
-0.06
0.31***
0.35***
0.41***
T1_COPE_SelfBlame
0.18**
0.18**
0.19**
0.16*
0.29***
-0.04
0.42***
0.29***
0.27***
0.14*
0.28***
0.28***
0.09
0.39***
0.31***
0.15*
T1_COPE_Planning
0.05
0.14*
0.08
0.08
-0.01
0.06
0.39***
0.65***
0.16*
-0.02
0.44***
0.04
0.24***
0.41***
0.47***
0.55***
0.38***
T1_COPE_Humor
0.17**
0.03
0.19**
0.10
0.14*
-0.04
0.22***
0.11
0.19**
0.10
0.18**
0.18**
0.05
0.28***
0.26***
0.17**
0.27***
0.16*
T1_COPE_Acceptance
0.04
0.04
0.09
0.10
-0.03
0.05
0.35***
0.44***
-0.03
-0.01
0.37***
-0.02
0.13*
0.37***
0.42***
0.35***
0.21**
0.49***
0.24***
T1_CFS_Adaptive
-0.10
-0.02
-0.07
-0.02
-0.23***
-0.05
0.15*
0.45***
0.00
-0.09
0.28***
-0.16*
0.07
0.26***
0.31***
0.30***
0.03
0.37***
0.11
0.36***
T1_CFS_Evaluation_new
-0.12
0.02
-0.09
-0.08
-0.17**
-0.07
0.09
0.37***
0.04
-0.10
0.28***
-0.11
0.14*
0.23***
0.25***
0.37***
0.09
0.29***
0.04
0.28***
0.61***
CST_switchcost.1
0.09
0.04
0.11
0.10
0.04
0.00
0.09
0.19**
0.09
0.00
0.10
0.03
0.05
0.11
0.09
0.04
0.11
0.11
0.08
0.10
0.08
0.10
Code
write.csv(r_with_stars_df, "correlation_table.csv")# Assuming cor_matrix is your correlation matrix# Remove "T1_" from row and column namescolnames(r_with_stars_df) <-gsub("^T1_", "", colnames(r_with_stars_df))rownames(r_with_stars_df) <-gsub("^T1_", "", rownames(r_with_stars_df))
12 Initial Steps - Commented out
12.1 Data Preparation
The following sections outline data processing steps, including error correction and variable recoding. These checks were performed on the original dataset, updated accordingly through this code, and saved as a cleaned dataset.
Code
#Addressing fake participants# Count how many 'fake' people are in the dataset (Include == 0 or -1)# count_fake <- df %>% filter(Include %in% c(0, -1)) %>% nrow()# print(count_fake)# # # Remove dodgy people (Include != 0 and != -1)# df <- df %>% filter(!Include %in% c(0, -1))# Check that removal workedcount_fake_check <- df %>%filter(Include %in%c(0, -1)) %>%nrow()print(count_fake_check)
[1] 0
Code
# # #Adjust Age variable # table(df$T1_DOB) #there is one participant (63e57265347f612359b7c4ec) with DOB 19888# df$T1_DOB[df$PID == "63e57265347f612359b7c4ec"] <- 1988 #changing the DOB to 1988# table(df$T1_DOB) #there are 2 participants with DOB as 1961 and 1980# df$T1_DOB <- as.numeric(df$T1_DOB)# # df$Age_2024 <- 2024 - df$T1_DOB# table(df$Age_2024)# #exclude over 36yo# # df <- df %>%# filter(Age_2024 <= 36)table(df$Age_2024)
# #Change the gender variable# #OG scoring man = 1, woman = 2, non-binary = 3, prefer not to say = 4.# #Take all 3s and 4s and make them 2s# table(df$T1_Gender)# table(df$T1_Sex)# # df <- df %>%# mutate(T1_Gender_MW = ifelse(T1_Gender %in% c(3, 4), 2, T1_Gender))# # summary(df$T1_Gender == 3)# summary(df$T1_Gender == 4)# summary(df$T1_Gender_MW == 3)# summary(df$T1_Gender_MW == 4)#there were 2x Gender = 4, there were 10x Gender = 3table(df$T1_Gender)
1 2 3 4
118 209 8 2
Code
table(df$T1_Sex)
1 2
118 219
Code
table(df$T1_Gender_MW)
Man Woman
118 219
Code
#after mutate, 132x Gender = 1, there were 245x Gender = 3# #Exclude non employed people## df %>%# filter(T1_Employment_recoded == 4) %>%# summarise(n = n())# # #view the PIDs# simple.table <- df[df$T1_Employment_recoded == 4, c("PID", "T1_Employment_recoded")]# # #exclude# df <- df %>%# filter(T1_Employment_recoded != 4)# # ###Exclude based on CST not better than chance# simple.table <- df[c('PID', 'CST_correct.1', "T1_CFS_1")]# # #exclude rows where CST_correct.1 < 50 and not NA# sum(df$CST_correct.1 <= 50, na.rm = TRUE)# sum(is.na(df$CST_correct.1))# # #Keep all participants who either didn’t complete the CST (missing), or who %correct was at least 50# df <- df %>% # filter(is.na(CST_correct.1) | CST_correct.1 >= 50)
12.2 Scoring
The following section checks variable scoring and recodes where appropriate. These checks were applied to the original dataset, updated via this code, and saved as a cleaned dataset.
The following section ensures that variables are in the correct format to produce the regressions from which the result informed the variables included in the multiple imputation. These processes were applied to the original dataset, updated via this code, and saved as a cleaned dataset.
Code
#Binge drinking #Make the binge drinking variable a binary 1 and 0 for bingeing in the last 2 weeks.#Assign a 0 if the person did not answer the Q based on not having drunk in the last 2 weeks.# # simple.table <- df[, c("PID", "T1_Binge_Freq", "T1_Substance_Q2", "T1_Binge_Freq_MI")]# # df <- df %>%# mutate(# T1_Binge_Freq_MI = case_when(# T1_Substance_Q2 %in% c(1, 2) ~ 0, # No use → 0# T1_Binge_Freq == 1 ~ 0, # Did not binge → 0# T1_Binge_Freq %in% 2:4 ~ T1_Binge_Freq - 1, # Shift 2:4 → 1:3# TRUE ~ NA_real_# ),# # T2_Binge_Freq_MI = case_when(# T2_Substance_Q2 %in% c(1, 2) ~ 0,# T2_Binge_Freq == 1 ~ 0,# T2_Binge_Freq %in% 2:4 ~ T2_Binge_Freq - 1,# TRUE ~ NA_real_# )# )# simple.table <- df[, c("PID", "T1_Binge_Freq_MI", "T1_Binge_Freq", "T1_Substance_Q2")]# # #ADNM8# #prep ADNM8 count# "T1_ADNM8_A" %in% colnames(df)# #false # # Vector of variable names# adnm_items <- paste0("T1_ADNM8_A_", 1:17)# # # First, ensure all variables are numeric# df[adnm_items] <- lapply(df[adnm_items], function(x) as.numeric(as.character(x)))# # summary(df[, adnm_items])# # #change scoring to 0 and 1# df[adnm_items] <- lapply(df[adnm_items], function(x) ifelse(x == 2, 1,# ifelse(x == 1, 0, NA)))# # #check it # simple.table <- df[, c("PID", "T1_ADNM8_A_1", "T1_ADNM8_A_2", "T1_ADNM8_A_3", "T1_ADNM8_A_4")]# # # Now calculate the row sums# df$T1_ADNM8_A <- rowSums(df[, adnm_items], na.rm = TRUE)# # # Set to NA where all contributing values were NA# all_na <- apply(df[, adnm_items], 1, function(x) all(is.na(x)))# # df$T1_ADNM8_A[all_na] <- NA# # #check it simple.table <- df[, c("PID", "T1_ADNM8_A", "T1_ADNM8_A_1", "T1_ADNM8_A_2", "T1_ADNM8_A_3", "T1_ADNM8_A_4")]# # # recategorise ethnicity variables# df <- df %>%# mutate(# T1_Ethnicity = as.character(T1_Ethnicity), # Ensure it's character for regex# Ethnicity_White = case_when(# is.na(T1_Ethnicity) ~ NA_real_,# grepl("\\b1\\b", T1_Ethnicity) ~ 1,# TRUE ~ 0# ),# Ethnicity_AsianPI = case_when(# is.na(T1_Ethnicity) ~ NA_real_,# grepl("\\b5\\b", T1_Ethnicity) | # Asian# grepl("\\b6\\b", T1_Ethnicity) | # South Asian# grepl("\\b7\\b", T1_Ethnicity) ~ 1, # Pacific Islander# TRUE ~ 0# ),# Ethnicity_BlackOther = case_when(# is.na(T1_Ethnicity) ~ NA_real_,# grepl("\\b2\\b", T1_Ethnicity) | # Hispanic or Latino# grepl("\\b3\\b", T1_Ethnicity) | # Black or African American# grepl("\\b4\\b", T1_Ethnicity) | # African# grepl("\\b8\\b", T1_Ethnicity) | # Aboriginal or Torres Strait# grepl("\\b9\\b", T1_Ethnicity) | # Middle Eastern# grepl("\\b10\\b", T1_Ethnicity) ~ 1, # Other# TRUE ~ 0# )# )# # #education # df <- df %>%# mutate(# T1_EducationMI = case_when(# T1_Education %in% c(1, 2, 8) ~ "School only",# T1_Education %in% c(3, 4) ~ "Vocational",# T1_Education == 5 ~ "University (Bachelor)",# T1_Education %in% c(6, 7) ~ "University (Postgrad)",# TRUE ~ NA_character_# ),# T1_EducationMI = factor(# T1_EducationMI,# levels = c(# "School only",# "Vocational",# "University (Bachelor)",# "University (Postgrad)"# )# )# )# table(df$T1_EducationMI, useNA ="ifany")
School only University (Bachelor) University (Postgrad)
81 145 76
Vocational
35